# Add 'hide code' button
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
The raw code for this IPython notebook is by default hidden for easier reading.
To toggle on/off the raw code, click <a href="javascript:code_toggle()">here</a>.''')
import pandas as pd
import plotly.graph_objects as go
import plotly
import numpy as np
import datetime
import sys
from os import path
import plotly.express as px
### Parameter
csse_github_covid19_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/"
confirmed_global_cases_url = "csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
UID_ISO_FIPS_LookUp_Table_url = "csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"
covid19_testing_data = "https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv"
covid19_US_data = 'csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv'
Some boring Data Wrangling for Data clearning, easier plotting and creating new information.
# Function to get the newest Data from "CSSE – Center For Systems Science and Engineering at JHU"
def get_lookup_table():
if not path.exists(UID_ISO_FIPS_LookUp_Table_url):
url = csse_github_covid19_url + UID_ISO_FIPS_LookUp_Table_url
csse_data = pd.read_csv(url)
csse_data.to_csv(UID_ISO_FIPS_LookUp_Table_url, index = False)
print("Translation table succesfully downloaded.")
return csse_data
else:
print("Data already exists on disk.")
return pd.read_csv(UID_ISO_FIPS_LookUp_Table_url)
def nvl(a, b):
return a if not np.isnan(a) else b
def update_covid19_global_data():
try:
url = csse_github_covid19_url + confirmed_global_cases_url
csse_data = pd.read_csv(url)
csse_data.to_csv(confirmed_global_cases_url, index = False)
print("Data downloaded and updated succesfully.")
except Exception as E:
print("Error! ", E)
def filter_out_countries(df, min_cases = 100):
last_date = np.max(df['Date'])
df['Infected_now'] = df.groupby('Country/Region')['Infected'].transform(max)
df = df[df['Infected_now']>min_cases]
return df
def transform_covid19_global_data(df):
df = pd.melt(df, id_vars = ['Province/State', 'Country/Region', 'Lat', 'Long'],
var_name = 'Date', value_name = 'Infected')
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
return df
def create_list_of_days(df):
first_day = np.min(df['Date'])
last_day = np.max(df['Date'])
numdays = (np.max(df['Date'])-np.min(df['Date'])).days
return [first_day + datetime.timedelta(days=x) for x in range(numdays+1)]
def transform_groupby_country(df):
df.rename(columns = {'Long':'Long_df', 'Lat':'Lat_df'}, inplace = True)
return df.groupby(by = ['Country/Region', 'Date'], as_index=False).sum().reindex()
def add_iso3_code(df):
UID_ISO_FIPS_LookUp_Table = get_lookup_table()
UID_ISO_FIPS_LookUp_Table_selected = UID_ISO_FIPS_LookUp_Table[['Combined_Key', 'Lat', 'Long_', 'iso3']].drop_duplicates()
df = df.merge(UID_ISO_FIPS_LookUp_Table_selected, left_on = 'Country/Region', right_on = 'Combined_Key', how = 'left')
df['Lat_merged'] = df['Lat'].fillna(df['Lat_df'])
df['Long_merged'] = df['Long_'].fillna(df['Long_df'])
df.drop(columns=['Lat','Long_', 'Lat_df', 'Long_df'], inplace = True)
df.loc[df['Country/Region']=='Canada', 'iso3'] = 'CAN'
df.loc[df['Country/Region']=='China', 'iso3'] = 'CHN'
df.loc[df['Country/Region']=='Australia', 'iso3'] = 'AUS'
df.loc[df['Country/Region']=='US', 'iso3'] = 'USA'
df.loc[df['Country/Region']=='Belize', 'iso3'] = 'BLZ'
df.loc[df['Country/Region']=='Botswana', 'iso3'] = 'BWA'
df.loc[df['Country/Region']=='Burundi', 'iso3'] = 'BDI'
df.loc[df['Country/Region']=='Malawi', 'iso3'] = 'MWI'
df.loc[df['Country/Region']=='Mauritania', 'iso3'] = 'MRT'
df.loc[df['Country/Region']=='Sierra Leone', 'iso3'] = 'SLE'
df.loc[df['Country/Region']=='South Sudan', 'iso3'] = 'SSD'
df.loc[df['Country/Region']=='Western Sahara', 'iso3'] = 'ESH'
df.loc[df['Country/Region']=='Yemen', 'iso3'] = 'YEM'
df.loc[df['Country/Region']=='Sao Tome and Principe', 'iso3'] = 'STP'
df.loc[df['Country/Region']=='Canada', 'Long_merged'] = -106.3468
df.loc[df['Country/Region']=='China', 'Long_merged'] = 104.1954
df.loc[df['Country/Region']=='Australia', 'Long_merged'] = 133.7751
df.loc[df['Country/Region']=='Canada', 'Lat_merged'] = 56.1304
df.loc[df['Country/Region']=='China', 'Lat_merged'] = 35.8617
df.loc[df['Country/Region']=='Australia', 'Lat_merged'] = -25.274
return df
def add_trajectory(df):
# Trajectory and if were winning against covid19 based on https://www.youtube.com/watch?v=54XLXg4fYsc
df['Log infected'] = np.log(df['Infected'])
try:
df.sort_values(by=['Country/Region', 'Date'], inplace = True)
except:
df.sort_values(by=['Province State', 'Date'], inplace = True)
df['New daily infected'] = df['Infected']-df.shift(periods=1)['Infected']
df['New daily infected'] = np.where(df['New daily infected']<0, 0, df['New daily infected'])
# https://stackoverflow.com/questions/13996302/python-rolling-functions-for-groupby-object
try:
df['New weekly infected'] = df.groupby('Country/Region')["New daily infected"].rolling(7).sum().reset_index(0, drop=True)
except:
df['New weekly infected'] = df.groupby('Province State')["New daily infected"].rolling(7).sum().reset_index(0, drop=True)
df['Log new weekly infected'] = np.log(df['New weekly infected'])
df['Slope infected'] = (df['Log new weekly infected']-df.shift(periods=1)['Log new weekly infected'])/(df['Log infected']-df.shift(periods=1)['Log infected'])
return df
def add_population(df):
UID_ISO_FIPS_LookUp_Table = get_lookup_table()
population_iso3 = UID_ISO_FIPS_LookUp_Table[['Combined_Key', 'Population']]
population_iso3 = population_iso3.groupby('Combined_Key').sum()
df = df.merge(population_iso3, on ='Combined_Key')
df['Infected_per_k'] = (df['Infected']*1000/(df['Population']))
df['Infected_per_c'] = (df['Infected']*100/(df['Population']))
return df
def update_transform_covi19_data():
update_covid19_global_data()
df = pd.read_csv(confirmed_global_cases_url)
df = transform_covid19_global_data(df)
df = transform_groupby_country(df)
df = add_iso3_code(df)
df = add_trajectory(df)
df = add_population(df)
return df
def transform_data(df):
df = df[['iso_code', 'location', 'date', 'total_cases', 'new_cases', 'new_cases_per_million', 'total_cases_per_million',
'total_tests_per_thousand', 'new_tests', 'total_deaths', 'total_deaths_per_million']]
return df
def get_transform_data():
df = pd.read_csv(covid19_testing_data)
df = transform_data(df)
df = df.rename(columns = {'total_tests_per_thousand':'Total tests per thousand', 'new_tests': 'New tests',
'total_deaths': 'Total deaths', 'total_deaths_per_million': 'Total deaths per million',
'location':'Location', 'total_cases': 'Total cases', 'new_cases': 'New cases',
'new_cases_per_million':'New cases per million', 'total_cases_per_million':'Total cases per million'})
df['Total tests per thousand'] = df.groupby('Location')['Total tests per thousand'].transform(lambda x: x.fillna(method='ffill'))
return df
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def get_states_data(return_ = 'state'):
df = pd.read_csv(csse_github_covid19_url + covid19_US_data)
df = df[df['Country_Region']=='US']
if return_ == 'FIPS':
df = df.drop(columns=['Admin2', 'iso3', 'iso2', 'UID', 'code3', 'Lat', 'Long_'])
df = df.groupby(['Province_State', 'FIPS'], as_index = False).sum()
df = df.melt(id_vars = ['Province_State', 'FIPS'], var_name = 'Date', value_name = 'Infected')
elif return_ == 'state':
df = df.drop(columns=['Admin2', 'iso3', 'iso2', 'UID', 'code3', 'Lat', 'Long_', 'FIPS'])
df = df.groupby(['Province_State'], as_index = False).sum()
df = df.melt(id_vars = ['Province_State'], var_name = 'Date', value_name = 'Infected')
else:
print("What did you type? Please only state or fips")
df['Date'] = pd.to_datetime(df['Date'], format='%m/%d/%y')
df = df.rename(columns={'Province_State': 'Province State'})
df = add_trajectory(df)
return df
def covid_world_same_days():
update_covid19_global_data()
df = pd.read_csv(confirmed_global_cases_url)
I tried generally to apply fititng plots and create plots for colorblind people buy using a lot of blue and orange and the viridis colorscale. Also I applied different and hopefully fitting plots to present the data. By asking different people about their opinions, I was able to remove a bad plot (It was an interactive pie-chart with slices being countries and the size of the slice being determined by it's total cases per thousands) and adjusted the color of the first Plot based on feedback.
The data is from COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University and Data on COVID-19 (coronavirus) confirmed cases, deaths, and tests • All countries • Updated daily by Our World in Data https://ourworldindata.org/coronavirus
df = get_transform_data()
df['Total cases per thousand'] = df['Total cases per million']/1000
# Change here the Country you would like to see!
country = 'Switzerland'
#
df = df[df['Location']==country]
last_update = np.max(df['date'])
first_case = np.min(df[df['Total cases'] > 10]['date'])
df = df[df['date']>=first_case]
df['Standard Deviation'] = df['Total cases per thousand'].rolling(7).std().fillna(0)
fig = go.Figure()
fig.add_trace(go.Scatter(x=df['date'], y=df['Total cases per thousand'] + df['Standard Deviation'] ,
mode='lines',
marker=dict(color='white'),
name='')
)
fig.add_trace(go.Scatter(x=df['date'], y=df['Total cases per thousand'] - df['Standard Deviation'] ,
mode='lines',
name='',
marker=dict(color='white'),
fill='tonexty',
fillcolor='lightblue')
)
fig.add_trace(go.Scatter(x=df['date'], y=df['Total cases per thousand'],
mode='lines',
marker=dict(color='darkblue', size=2),
name = 'Total cases per thousand'
)
)
fig.update_layout(title_text = 'Total cases per thousand in ' + country +' with a rolling (7) Standard Deviation. <br>Last updated on ' + last_update,
showlegend = True, yaxis_title='Cases per thousand',
template='simple_white', hovermode='x'
)
fig.show()
import plotly.express as px
data = get_transform_data()
last_update = np.max(data['date'])
data = data[data['date']==last_update]
data['Total cases per thousand'] = data['Total cases per million']/1000
data = data[data['Total tests per thousand']>30]
data = data[data['Total cases per thousand']>3]
fig = go.Figure(data=[
go.Bar(name='Total cases per thousand', x=data['Location'], y=data['Total cases per thousand'],
text=data['Total cases per thousand'],textposition='auto'),
go.Bar(name='Total tests per thousand', x=data['Location'], y=data['Total tests per thousand'],
text=data['Total tests per thousand'],textposition='outside'),
])
# Change the bar mode
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(barmode='group',
title_text = 'Total cases and tests per thousand for selected countries.<br>Last updated on ' + last_update,
showlegend = True, yaxis_title='Cases and tests per thousand',
template='simple_white'
)
fig.show()
data = get_transform_data()
last_update = np.max(data['date'])
scale = 4
data['Total deaths per million'] = np.round(data['Total cases per million'].fillna(0), 2)
data = data[data['date']==last_update]
fig = px.choropleth(data,
locations = 'iso_code',
color="Total cases per million",
hover_name = "Location",
projection="natural earth",
color_continuous_scale=px.colors.sequential.Viridis,
range_color=(0, 7000)
)
fig.update_layout(
title_text = 'Total cases per million<br>Last updated on ' + last_update, #,.strtime("%d/%m"),
showlegend = True
)
fig.show()
data = get_transform_data()
last_update = np.max(data['date'])
scale = 4
data['Total deaths per million'] = np.round(data['Total deaths per million'].fillna(0), 2)
data = data[data['date']==last_update]
fig = px.choropleth(data,
locations = 'iso_code',
color="Total deaths per million",
hover_name = "Location",
projection="natural earth",
color_continuous_scale=px.colors.sequential.Viridis,
range_color=(0, 800)
)
fig.update_layout(
title_text = 'Total Deaths per Million<br>Last updated on ' + last_update, #,.strtime("%d/%m"),
showlegend = True
)
fig.show()
I wanted to recreate the plot from https://www.youtube.com/watch?v=54XLXg4fYsc in plotly and this is what I did. It shows how the countries are doing against COVID-19.
# This plot shows if a country is "winning" against Covid19
# It's heavily based on this video: https://www.youtube.com/watch?v=54XLXg4fYsc
last_update = np.max(data['date'])
df = update_transform_covi19_data()
df = filter_out_countries(df, 30000)
df = df.sort_values(by=['Date', 'Country/Region'])
df['Date'] = df['Date'].dt.strftime('%m/%d/%y')
fig = px.scatter(df,
y='New weekly infected',
x="Infected",
animation_frame="Date",
hover_name="Country/Region",
text = 'iso3'
)
fig.add_trace(go.Scatter(x=[1, 10000000], y=[2, 10000000],
name='Doubling of cases every 7 days',
line=dict(color='red', width=2, dash='dot')
))
fig.add_trace(go.Scatter(x=[1, 10000000], y=[1.5, 5000000],
name='Doubling of cases every 14 days',
line=dict(color='orange', width=2, dash='dot')
))
fig.add_trace(go.Scatter(x=[1, 10000000], y=[1.25, 250000],
name='Doubling of cases every 28 days',
line=dict(color='green', width=2, dash='dot')
))
fig.add_trace(go.Scatter(x=[1, 10000000], y=[1.125, 125000],
name='Doubling of cases every 56 days',
line=dict(color='lightgreen', width=2, dash='dot')
))
fig.update_traces(textposition='top center')
fig.update_layout(xaxis_type="log", yaxis_type="log", yaxis = dict(range= [2,6]), xaxis=dict(range=[3, 7]))
fig.update_layout(
title_text = 'Log Scale of Weekly Cases vs. Total Cases<br>Last updated on ' + last_update, #,.strtime("%d/%m"),
showlegend = True, template='simple_white', hovermode='x'
)
fig.show()
This plot is the same as the above but I wanted to see how countries have dealt with COVID-19. Thus, the starting day is the same for every country: As soon as a country had it's first 1000th case, it appears in the plot. Because this leads to different amount of data for different countries**, we "freeze" the point as soon as we are out of data for the country.
**The data-logging started for every country on the 22.1.2020. However, the 1000th case was different for each country, thus we ignore the data before the 1000th case and this leads to less days of data for different countries because some countries took longer to reach the 1000th case then others.
df = update_transform_covi19_data()
df = filter_out_countries(df, 30000)
df['Date'] = np.where(df['Infected']>=1000, df['Date'], np.datetime64('NaT'))
df['New weekly infected'] = np.where(df['Infected']>1000, df['New weekly infected'], np.nan)
df['Infected'] = np.where(df['Infected']>=1000, df['Infected'], np.nan)
df = df.sort_values(by=['Country/Region', 'Date'])
df.fillna(method='ffill', inplace=True)
df['Days since the 1000th case'] = df.groupby('Country/Region').cumcount()+1
fig = px.scatter(df,
y='New weekly infected',
x="Infected",
animation_frame="Days since the 1000th case",
hover_name="Country/Region",
text = 'iso3'
)
fig.add_trace(go.Scatter(x=[1, 10000000], y=[2, 10000000],
name='Doubling of cases every 7 days',
line=dict(color='red', width=2, dash='dot')
))
fig.add_trace(go.Scatter(x=[1, 10000000], y=[1.5, 5000000],
name='Doubling of cases every 14 days',
line=dict(color='orange', width=2, dash='dot')
))
fig.add_trace(go.Scatter(x=[1, 10000000], y=[1.25, 250000],
name='Doubling of cases every 28 days',
line=dict(color='green', width=2, dash='dot')
))
fig.add_trace(go.Scatter(x=[1, 10000000], y=[1.125, 125000],
name='Doubling of cases every 56 days',
line=dict(color='lightgreen', width=2, dash='dot')
))
fig.update_traces(textposition='top center')
fig.update_layout(xaxis_type="log", yaxis_type="log", yaxis = dict(range= [1,6]), xaxis=dict(range=[2.5, 7]))
fig.update_layout(
title_text = 'Log Scale of Weekly Cases vs. Total Cases. The first day for a country is the day with the 1000th case. <br>Updated on ' + last_update, #,.strtime("%d/%m"),
showlegend = False, template='simple_white', hovermode='x'
)
fig.show()
df = update_transform_covi19_data()
df = filter_out_countries(df, 10000)
df = df.sort_values(by=['Date', 'Country/Region'])
df['Date'] = df['Date'].dt.strftime('%m/%d/%y')
df['WI_INF'] = df['New weekly infected']/df['Infected']
fig = go.Figure()
df_china = df[df['Country/Region']=='China']
fig.add_trace(go.Scatter(
y=df_china['Infected'],
x=df_china["Date"],
mode='lines',
name='Reported numbers by China')
)
df_US = df[df['Country/Region']=='Italy']
fig.add_trace(go.Scatter(
y=df_US['Infected'],
x=df_US["Date"],
mode='lines',
name='Reported numbers by Italy')
)
fig.update_layout(
title_text = 'Cases reported by Italy and China.<br> Updated on ' + last_update, #,.strtime("%d/%m"),
showlegend = True,
hovermode='x', template='simple_white')
fig.show()